{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "#import lightgbm as lgb\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import log_loss\n",
    "from sklearn import preprocessing\n",
    "import matplotlib.pyplot as plt\n",
    "import warnings\n",
    "\n",
    "%matplotlib inline \n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"input/train.txt\", sep=\"\\s+\")\n",
    "test = pd.read_csv(\"input/test.txt\", sep=\"\\s+\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instance_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_category_list</th>\n",
       "      <th>item_property_list</th>\n",
       "      <th>item_brand_id</th>\n",
       "      <th>item_city_id</th>\n",
       "      <th>item_price_level</th>\n",
       "      <th>item_sales_level</th>\n",
       "      <th>item_collected_level</th>\n",
       "      <th>item_pv_level</th>\n",
       "      <th>...</th>\n",
       "      <th>context_page_id</th>\n",
       "      <th>predict_category_property</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>shop_review_num_level</th>\n",
       "      <th>shop_review_positive_rate</th>\n",
       "      <th>shop_star_level</th>\n",
       "      <th>shop_score_service</th>\n",
       "      <th>shop_score_delivery</th>\n",
       "      <th>shop_score_description</th>\n",
       "      <th>is_trade</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>108641074714126964</td>\n",
       "      <td>3412720377098676069</td>\n",
       "      <td>7908382889764677758;5799347067982556520</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>1975590437749032870</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>4006</td>\n",
       "      <td>5799347067982556520:-1;509660095530134768:-1;5...</td>\n",
       "      <td>6765930309048922341</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5754713551599725161</td>\n",
       "      <td>3412720377098676069</td>\n",
       "      <td>7908382889764677758;5799347067982556520</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>1975590437749032870</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>5799347067982556520:9172976955054793469;790838...</td>\n",
       "      <td>6765930309048922341</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>842679481291040981</td>\n",
       "      <td>3412720377098676069</td>\n",
       "      <td>7908382889764677758;5799347067982556520</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>1975590437749032870</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>5799347067982556520:5131280576272319091;725801...</td>\n",
       "      <td>6765930309048922341</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>937088850059189027</td>\n",
       "      <td>3412720377098676069</td>\n",
       "      <td>7908382889764677758;5799347067982556520</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>1975590437749032870</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>4016</td>\n",
       "      <td>509660095530134768:-1;5799347067982556520:-1;7...</td>\n",
       "      <td>6765930309048922341</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7975697065017708072</td>\n",
       "      <td>3412720377098676069</td>\n",
       "      <td>7908382889764677758;5799347067982556520</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>1975590437749032870</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>5799347067982556520:9172976955054793469;790838...</td>\n",
       "      <td>6765930309048922341</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x19ec1b72d68>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEnJJREFUeJzt3HGs3eV93/H3Z3ZCXCgJlOrKstFwNauTga6Lrxhbu+hG\ndMNNqplKG3KVDndC+A9Ymk5Mm1mlpfvDEplGt5IOJK9kmBbFc9NUttaylbpcRfsDKCQkxlAPp5hg\nz+C2aUIddTSm3/1xHq8n97Gxfc6Nz7nX75d0dJ7z/H7Pc57v/fn6c8/vd85JVSFJ0rC/NukFSJKm\nj+EgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkzspJL2BU11xzTV133XUjjf3Wt77F\n5ZdfvrgLmhBrmT7LpQ6wlmk1Ti3PP//8H1fV959zx6p61xvwGeAE8OJQ39XAk8Ar7f6qoW33AYeB\nQ8CtQ/0bgQNt24NAWv9lwH9r/c8A151rTVXFxo0ba1RPPfXUyGOnjbVMn+VSR5W1TKtxagGeq/P4\nP/Z8Tis9Cmxa0Lcd2F9V64H97TFJNgBbgOvbmIeSrGhjHgbuAta32+k57wT+tKr+BvAfgU+dx5ok\nSd9F5wyHqvoC8PUF3ZuBXa29C7htqH93Vb1dVa8yeDVwU5LVwJVV9XRLrscWjDk91+eAW5Jk1IIk\nSeMb9YL0TFUdb+03gJnWXgO8PrTf0da3prUX9n/HmKo6BXwT+L4R1yVJWgRjX5CuqkpyUb73O8k2\nYBvAzMwM8/PzI81z8uTJkcdOG2uZPsulDrCWaXUxahk1HN5MsrqqjrdTRida/zHg2qH91ra+Y629\nsH94zNEkK4H3A39ypietqp3AToDZ2dmam5sbafHz8/OMOnbaWMv0WS51gLVMq4tRy6inlfYBW1t7\nK7B3qH9LksuSrGNw4fnZdgrqrSQ3t+sJdywYc3qufwz8XrsuIUmakHO+ckjyWWAOuCbJUeCTwP3A\nniR3Aq8BtwNU1cEke4CXgFPAPVX1TpvqbgbvfFoFPNFuAI8Av5rkMIML31sWpTJJ0sjOGQ5V9VNn\n2XTLWfbfAew4Q/9zwA1n6P+/wD851zokSRePX58hSeos2a/PGMeBY9/kZ7b/1kSe+8j9H53I80rS\nhfCVgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqG\ngySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySp\nYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjpjhUOSf5HkYJIXk3w2yfuSXJ3kySSvtPur\nhva/L8nhJIeS3DrUvzHJgbbtwSQZZ12SpPGMHA5J1gA/C8xW1Q3ACmALsB3YX1Xrgf3tMUk2tO3X\nA5uAh5KsaNM9DNwFrG+3TaOuS5I0vnFPK60EViVZCXwP8H+AzcCutn0XcFtrbwZ2V9XbVfUqcBi4\nKclq4MqqerqqCnhsaIwkaQJWjjqwqo4l+Q/A14A/B36nqn4nyUxVHW+7vQHMtPYa4OmhKY62vm+3\n9sL+TpJtwDaAmZkZ5ufnR1r7zCq498ZTI40d16hrPpuTJ08u+pyTslxqWS51gLVMq4tRy8jh0K4l\nbAbWAd8Afj3JTw/vU1WVpMZb4nfMtxPYCTA7O1tzc3MjzfPpx/fywIGRSx/LkY/NLep88/PzjPpz\nmDbLpZblUgdYy7S6GLWMc1rpx4BXq+qPqurbwOeBvwe82U4V0e5PtP2PAdcOjV/b+o619sJ+SdKE\njBMOXwNuTvI97d1FtwAvA/uArW2frcDe1t4HbElyWZJ1DC48P9tOQb2V5OY2zx1DYyRJEzDONYdn\nknwO+CJwCvgSg1M+VwB7ktwJvAbc3vY/mGQP8FLb/56qeqdNdzfwKLAKeKLdJEkTMtaJ96r6JPDJ\nBd1vM3gVcab9dwA7ztD/HHDDOGuRJC0ePyEtSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoY\nDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKk\njuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEg\nSeqMFQ5JPpDkc0n+IMnLSf5ukquTPJnklXZ/1dD+9yU5nORQkluH+jcmOdC2PZgk46xLkjSecV85\n/BLwP6rqbwJ/C3gZ2A7sr6r1wP72mCQbgC3A9cAm4KEkK9o8DwN3AevbbdOY65IkjWHkcEjyfuBD\nwCMAVfUXVfUNYDOwq+22C7ittTcDu6vq7ap6FTgM3JRkNXBlVT1dVQU8NjRGkjQB47xyWAf8EfBf\nk3wpya8kuRyYqarjbZ83gJnWXgO8PjT+aOtb09oL+yVJE7JyzLEfBD5eVc8k+SXaKaTTqqqS1DgL\nHJZkG7ANYGZmhvn5+ZHmmVkF9954arGWdUFGXfPZnDx5ctHnnJTlUstyqQOsZVpdjFrGCYejwNGq\neqY9/hyDcHgzyeqqOt5OGZ1o248B1w6NX9v6jrX2wv5OVe0EdgLMzs7W3NzcSAv/9ON7eeDAOKWP\n7sjH5hZ1vvn5eUb9OUyb5VLLcqkDrGVaXYxaRj6tVFVvAK8n+cHWdQvwErAP2Nr6tgJ7W3sfsCXJ\nZUnWMbjw/Gw7BfVWkpvbu5TuGBojSZqAcf98/jjweJL3An8I/DMGgbMnyZ3Aa8DtAFV1MMkeBgFy\nCrinqt5p89wNPAqsAp5oN0nShIwVDlX1AjB7hk23nGX/HcCOM/Q/B9wwzlokSYvHT0hLkjqGgySp\nYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhI\nkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqG\ngySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpM3Y4JFmR5EtJ/nt7fHWSJ5O80u6vGtr3viSH\nkxxKcutQ/8YkB9q2B5Nk3HVJkka3GK8cPgG8PPR4O7C/qtYD+9tjkmwAtgDXA5uAh5KsaGMeBu4C\n1rfbpkVYlyRpRGOFQ5K1wEeBXxnq3gzsau1dwG1D/bur6u2qehU4DNyUZDVwZVU9XVUFPDY0RpI0\nAeO+cvhPwL8C/nKob6aqjrf2G8BMa68BXh/a72jrW9PaC/slSROyctSBSX4COFFVzyeZO9M+VVVJ\natTnOMNzbgO2AczMzDA/Pz/SPDOr4N4bTy3Wsi7IqGs+m5MnTy76nJOyXGpZLnWAtUyri1HLyOEA\n/Ajwj5J8BHgfcGWSXwPeTLK6qo63U0Yn2v7HgGuHxq9tfcdae2F/p6p2AjsBZmdna25ubqSFf/rx\nvTxwYJzSR3fkY3OLOt/8/Dyj/hymzXKpZbnUAdYyrS5GLSOfVqqq+6pqbVVdx+BC8+9V1U8D+4Ct\nbbetwN7W3gdsSXJZknUMLjw/205BvZXk5vYupTuGxkiSJuC78efz/cCeJHcCrwG3A1TVwSR7gJeA\nU8A9VfVOG3M38CiwCnii3SRJE7Io4VBV88B8a/8JcMtZ9tsB7DhD/3PADYuxFknS+PyEtCSpYzhI\nkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqG\ngySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySp\nYzhIkjqGgySpYzhIkjqGgySpYzhIkjqGgySpYzhIkjojh0OSa5M8leSlJAeTfKL1X53kySSvtPur\nhsbcl+RwkkNJbh3q35jkQNv2YJKMV5YkaRzjvHI4BdxbVRuAm4F7kmwAtgP7q2o9sL89pm3bAlwP\nbAIeSrKizfUwcBewvt02jbEuSdKYRg6HqjpeVV9s7T8DXgbWAJuBXW23XcBtrb0Z2F1Vb1fVq8Bh\n4KYkq4Erq+rpqirgsaExkqQJyOD/4zEnSa4DvgDcAHytqj7Q+gP8aVV9IMkvA09X1a+1bY8ATwBH\ngPur6sda/98H/nVV/cQZnmcbsA1gZmZm4+7du0da74mvf5M3/3ykoWO7cc37F3W+kydPcsUVVyzq\nnJOyXGpZLnWAtUyrcWr58Ic//HxVzZ5rv5UjzT4kyRXAbwA/V1VvDV8uqKpKMn76/NV8O4GdALOz\nszU3NzfSPJ9+fC8PHBi79JEc+djcos43Pz/PqD+HabNcalkudYC1TKuLUctY71ZK8h4GwfB4VX2+\ndb/ZThXR7k+0/mPAtUPD17a+Y629sF+SNCHjvFspwCPAy1X1i0Ob9gFbW3srsHeof0uSy5KsY3Dh\n+dmqOg68leTmNucdQ2MkSRMwzrmVHwH+KXAgyQut798A9wN7ktwJvAbcDlBVB5PsAV5i8E6ne6rq\nnTbubuBRYBWD6xBPjLEuSdKYRg6HqvpfwNk+j3DLWcbsAHacof85BhezJUlTwE9IS5I6hoMkqWM4\nSJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6\nhoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMk\nqWM4SJI6hoMkqWM4SJI6hoMkqbNy0gu41Fy3/bcWdb57bzzFz5zHnEfu/+iiPq+k5W1qXjkk2ZTk\nUJLDSbZPej2SdCmbinBIsgL4z8CPAxuAn0qyYbKrkqRL17ScVroJOFxVfwiQZDewGXhpoqtaRhb7\ndNaF8JSWtPRMSzisAV4fenwU+DsTWosW2fkG0/lePzlfhpI0umkJh/OSZBuwrT08meTQiFNdA/zx\n4qxqsn7WWs4qn1qsmS7YsjkmWMu0GqeWv34+O01LOBwDrh16vLb1fYeq2gnsHPfJkjxXVbPjzjMN\nrGX6LJc6wFqm1cWoZSouSAO/D6xPsi7Je4EtwL4Jr0mSLllT8cqhqk4l+efA/wRWAJ+pqoMTXpYk\nXbKmIhwAquq3gd++SE839qmpKWIt02e51AHWMq2+67Wkqr7bzyFJWmKm5ZqDJGmKXHLhsNS+piPJ\nkSQHkryQ5LnWd3WSJ5O80u6vGtr/vlbboSS3Tm7lkOQzSU4keXGo74LXnmRj+xkcTvJgkkxJLb+Q\n5Fg7Ni8k+ci015Lk2iRPJXkpycEkn2j9S+64vEstS/G4vC/Js0m+3Gr5d61/cselqi6ZG4OL3V8F\nfgB4L/BlYMOk13WONR8BrlnQ9++B7a29HfhUa29oNV0GrGu1rpjg2j8EfBB4cZy1A88CNwMBngB+\nfEpq+QXgX55h36mtBVgNfLC1vxf43229S+64vEstS/G4BLiitd8DPNPWM7Hjcqm9cvj/X9NRVX8B\nnP6ajqVmM7CrtXcBtw31766qt6vqVeAwg5onoqq+AHx9QfcFrT3JauDKqnq6Bv/yHxsac9GcpZaz\nmdpaqup4VX2xtf8MeJnBNxQsuePyLrWczTTXUlV1sj18T7sVEzwul1o4nOlrOt7tH9M0KOB3kzyf\nwSfEAWaq6nhrvwHMtPZSqO9C176mtRf2T4uPJ/lKO+10+iX/kqglyXXA32bwV+qSPi4LaoEleFyS\nrEjyAnACeLKqJnpcLrVwWIp+tKp+mME31t6T5EPDG9tfB0vyLWdLee3NwwxOUf4wcBx4YLLLOX9J\nrgB+A/i5qnpreNtSOy5nqGVJHpeqeqf9rq9l8CrghgXbL+pxudTC4by+pmOaVNWxdn8C+E0Gp4ne\nbC8fafcn2u5Lob4LXfux1l7YP3FV9Wb7hf5L4L/wV6fwprqWJO9h8J/p41X1+da9JI/LmWpZqsfl\ntKr6BvAUsIkJHpdLLRyW1Nd0JLk8yfeebgP/EHiRwZq3tt22Antbex+wJcllSdYB6xlcnJomF7T2\n9pL6rSQ3t3dd3DE0ZqJO/9I2P8ng2MAU19Ke9xHg5ar6xaFNS+64nK2WJXpcvj/JB1p7FfAPgD9g\nksflYl6Rn4Yb8BEG72r4KvDzk17POdb6AwzekfBl4ODp9QLfB+wHXgF+F7h6aMzPt9oOMYF39SxY\n/2cZvKz/NoNzn3eOsnZglsEv+FeBX6Z9eHMKavlV4ADwlfbLunraawF+lMGpia8AL7TbR5bicXmX\nWpbicfkh4EttzS8C/7b1T+y4+AlpSVLnUjutJEk6D4aDJKljOEiSOoaDJKljOEiSOoaDJKljOEiS\nOoaDJKnz/wDtS/i00cr5TAAAAABJRU5ErkJggg==\n"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train.item_id.value_counts().hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x19ec1e9fb00>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD4CAYAAAAQP7oXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADopJREFUeJzt3V+InXV+x/H3Z5NdKyzaRIdgE7cjmFKi0F0MUdibsqFJ\niqXxQiULraEEc6ELu1BoY29ClYDe1CJUQWow2rIabMHgYiXElVJao2Nr10abZqhaDWqymVS7F9om\n++3F/EZPziaZX/44J3/eLzic5/zO83vmdyD45jnPM2OqCkmSenxl1AuQJJ0/jIYkqZvRkCR1MxqS\npG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3eaPegFn25VXXlnj4+OjXoYknVdee+21n1bV2Gz7XXDR\nGB8fZ2JiYtTLkKTzSpJ3e/bz6ylJUjejIUnqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiSul1w\nv9x3vhjf9KNRL+GC8s79N496CdJFwTMNSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhI\nkroZDUlSN6MhSepmNCRJ3YyGJKmb0ZAkdTMakqRuRkOS1M1oSJK6GQ1JUjejIUnqZjQkSd2MhiSp\nm9GQJHUzGpKkbkZDktTNaEiSuhkNSVI3oyFJ6mY0JEnduqORZF6Sf0nyXHu9MMnOJPva84KBfe9J\nMplkb5LVA+M3JHmjvfdQkrTxS5I83cZ3JxkfmLO+/Yx9SdafjQ8tSTo9p3Km8X3grYHXm4BdVbUU\n2NVek2QZsA64DlgDPJxkXpvzCHAnsLQ91rTxDcDhqroWeBB4oB1rIbAZuBFYAWwejJMkaW51RSPJ\nEuBm4C8HhtcC29r2NuCWgfGnquqzqnobmARWJLkKuKyqXq6qAp4YmjNzrGeAle0sZDWws6qmquow\nsJMvQiNJmmO9Zxp/DvwR8POBsUVV9UHb/hBY1LYXA+8N7Pd+G1vctofHj5lTVUeAj4ErTnKsYyTZ\nmGQiycTBgwc7P5Ik6VTNGo0kvwMcqKrXTrRPO3Oos7mwU1FVj1bV8qpaPjY2NqplSNIFr+dM49vA\n7yZ5B3gK+E6SvwI+al850Z4PtP33A1cPzF/Sxva37eHxY+YkmQ9cDhw6ybEkSSMwazSq6p6qWlJV\n40xf4H6xqn4P2AHM3M20Hni2be8A1rU7oq5h+oL3K+2rrE+S3NSuV9wxNGfmWLe2n1HAC8CqJAva\nBfBVbUySNALzz2Du/cD2JBuAd4HbAapqT5LtwJvAEeDuqjra5twFPA5cCjzfHgCPAU8mmQSmmI4T\nVTWV5D7g1bbfvVU1dQZrliSdgVOKRlW9BLzUtg8BK0+w3xZgy3HGJ4DrjzP+KXDbCY61Fdh6KuuU\nJH05/I1wSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3YyG\nJKmb0ZAkdTMakqRuRkOS1M1oSJK6GQ1JUjejIUnqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiS\nuhkNSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3WaNRpJf\nSvJKkn9NsifJn7bxhUl2JtnXnhcMzLknyWSSvUlWD4zfkOSN9t5DSdLGL0nydBvfnWR8YM769jP2\nJVl/Nj+8JOnU9JxpfAZ8p6p+A/gmsCbJTcAmYFdVLQV2tdckWQasA64D1gAPJ5nXjvUIcCewtD3W\ntPENwOGquhZ4EHigHWshsBm4EVgBbB6MkyRpbs0ajZr2s/byq+1RwFpgWxvfBtzSttcCT1XVZ1X1\nNjAJrEhyFXBZVb1cVQU8MTRn5ljPACvbWchqYGdVTVXVYWAnX4RGkjTHuq5pJJmX5HXgANP/Ed8N\nLKqqD9ouHwKL2vZi4L2B6e+3scVte3j8mDlVdQT4GLjiJMcaXt/GJBNJJg4ePNjzkSRJp6ErGlV1\ntKq+CSxh+qzh+qH3i+mzj5GoqkeranlVLR8bGxvVMiTpgndKd09V1X8DP2b6K6KP2ldOtOcDbbf9\nwNUD05a0sf1te3j8mDlJ5gOXA4dOcixJ0gj03D01luSX2/alwG8B/w7sAGbuZloPPNu2dwDr2h1R\n1zB9wfuV9lXWJ0luatcr7hiaM3OsW4EX29nLC8CqJAvaBfBVbUySNALzO/a5CtjW7oD6CrC9qp5L\n8k/A9iQbgHeB2wGqak+S7cCbwBHg7qo62o51F/A4cCnwfHsAPAY8mWQSmGL67iuqairJfcCrbb97\nq2rqTD6wJOn0zRqNqvoJ8K3jjB8CVp5gzhZgy3HGJ4DrjzP+KXDbCY61Fdg62zolSV8+fyNcktTN\naEiSuhkNSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3YyG\nJKmb0ZAkdTMakqRuRkOS1M1oSJK6GQ1JUjejIUnqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiS\nuhkNSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhIkroZDUlSt1mjkeTqJD9O8maSPUm+\n38YXJtmZZF97XjAw554kk0n2Jlk9MH5Dkjfaew8lSRu/JMnTbXx3kvGBOevbz9iXZP3Z/PCSpFPT\nc6ZxBPjDqloG3ATcnWQZsAnYVVVLgV3tNe29dcB1wBrg4STz2rEeAe4ElrbHmja+AThcVdcCDwIP\ntGMtBDYDNwIrgM2DcZIkza1Zo1FVH1TVP7ft/wHeAhYDa4FtbbdtwC1tey3wVFV9VlVvA5PAiiRX\nAZdV1ctVVcATQ3NmjvUMsLKdhawGdlbVVFUdBnbyRWgkSXPslK5ptK+NvgXsBhZV1QftrQ+BRW17\nMfDewLT329jitj08fsycqjoCfAxccZJjDa9rY5KJJBMHDx48lY8kSToF3dFI8nXgb4AfVNUng++1\nM4c6y2vrVlWPVtXyqlo+NjY2qmVI0gWvKxpJvsp0MP66qv62DX/UvnKiPR9o4/uBqwemL2lj+9v2\n8Pgxc5LMBy4HDp3kWJKkEei5eyrAY8BbVfVnA2/tAGbuZloPPDswvq7dEXUN0xe8X2lfZX2S5KZ2\nzDuG5swc61bgxXb28gKwKsmCdgF8VRuTJI3A/I59vg38PvBGktfb2J8A9wPbk2wA3gVuB6iqPUm2\nA28yfefV3VV1tM27C3gcuBR4vj1gOkpPJpkEppi++4qqmkpyH/Bq2+/eqpo6zc8qSTpDs0ajqv4B\nyAneXnmCOVuALccZnwCuP874p8BtJzjWVmDrbOuUJH35/I1wSVI3oyFJ6mY0JEndjIYkqZvRkCR1\nMxqSpG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3YyGJKmb0ZAkdTMakqRuRkOS1M1oSJK6GQ1JUjej\nIUnqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiSuhkNSVI3oyFJ6mY0JEndjIYkqZvRkCR1MxqS\npG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3WaNRpKtSQ4k+beBsYVJdibZ154XDLx3T5LJJHuTrB4Y\nvyHJG+29h5KkjV+S5Ok2vjvJ+MCc9e1n7Euy/mx9aEnS6ek503gcWDM0tgnYVVVLgV3tNUmWAeuA\n69qch5PMa3MeAe4ElrbHzDE3AIer6lrgQeCBdqyFwGbgRmAFsHkwTpKkuTdrNKrq74GpoeG1wLa2\nvQ24ZWD8qar6rKreBiaBFUmuAi6rqperqoAnhubMHOsZYGU7C1kN7Kyqqao6DOzkF+MlSZpDp3tN\nY1FVfdC2PwQWte3FwHsD+73fxha37eHxY+ZU1RHgY+CKkxzrFyTZmGQiycTBgwdP8yNJkmZzxhfC\n25lDnYW1nMkaHq2q5VW1fGxsbJRLkaQL2ulG46P2lRPt+UAb3w9cPbDfkja2v20Pjx8zJ8l84HLg\n0EmOJUkakdONxg5g5m6m9cCzA+Pr2h1R1zB9wfuV9lXWJ0luatcr7hiaM3OsW4EX29nLC8CqJAva\nBfBVbUySNCLzZ9shyQ+B3wSuTPI+03c03Q9sT7IBeBe4HaCq9iTZDrwJHAHurqqj7VB3MX0n1qXA\n8+0B8BjwZJJJpi+4r2vHmkpyH/Bq2+/eqhq+IC9JmkOzRqOqvnuCt1aeYP8twJbjjE8A1x9n/FPg\nthMcayuwdbY1SpLmhr8RLknqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiSuhkNSVI3oyFJ6mY0\nJEndjIYkqZvRkCR1MxqSpG5GQ5LUzWhIkroZDUlSN6MhSepmNCRJ3YyGJKmb0ZAkdTMakqRuRkOS\n1M1oSJK6GQ1JUjejIUnqZjQkSd2MhiSpm9GQJHUzGpKkbkZDktTNaEiSuhkNSVI3oyFJ6mY0JEnd\n5o96AZLOPeObfjTqJVww3rn/5lEv4aw6L840kqxJsjfJZJJNo16PJF2szvloJJkH/AXw28Ay4LtJ\nlo12VZJ0cTrnowGsACar6j+r6n+Bp4C1I16TJF2UzodrGouB9wZevw/cOLhDko3AxvbyZ0n2ztHa\nLgZXAj8d9SJmkwdGvQKNyDn/7/M8+rf5qz07nQ/RmFVVPQo8Oup1XIiSTFTV8lGvQzoe/33OvfPh\n66n9wNUDr5e0MUnSHDsfovEqsDTJNUm+BqwDdox4TZJ0UTrnv56qqiNJvge8AMwDtlbVnhEv62Li\n1346l/nvc46lqka9BknSeeJ8+HpKknSOMBqSpG5GQ5LUzWhIkrqd83dPaW4l+XWm/0zL4ja0H9hR\nVW+NblWSzhWeaehzSf6Y6b/tFeCV9gjwQ/+6sM5lSf5g1Gu4WHjLrT6X5D+A66rq/4bGvwbsqaql\no1mZdHJJ/quqvjHqdVwM/HpKg34O/Arw7tD4Ve09aWSS/OREbwGL5nItFzOjoUE/AHYl2ccXf1n4\nG8C1wPdGtipp2iJgNXB4aDzAP879ci5ORkOfq6q/S/JrTP8/TAYvhL9aVUdHtzIJgOeAr1fV68Nv\nJHlp7pdzcfKahiSpm3dPSZK6GQ1JUjejIUnqZjQkSd3+Hz7qwi63IYE0AAAAAElFTkSuQmCC\n"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train.is_trade.value_counts().plot.bar()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instance_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_category_list</th>\n",
       "      <th>item_property_list</th>\n",
       "      <th>item_brand_id</th>\n",
       "      <th>item_city_id</th>\n",
       "      <th>item_price_level</th>\n",
       "      <th>item_sales_level</th>\n",
       "      <th>item_collected_level</th>\n",
       "      <th>item_pv_level</th>\n",
       "      <th>...</th>\n",
       "      <th>context_page_id</th>\n",
       "      <th>predict_category_property</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>shop_review_num_level</th>\n",
       "      <th>shop_review_positive_rate</th>\n",
       "      <th>shop_star_level</th>\n",
       "      <th>shop_score_service</th>\n",
       "      <th>shop_score_delivery</th>\n",
       "      <th>shop_score_description</th>\n",
       "      <th>is_trade</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>7972325483281584825</td>\n",
       "      <td>285660928590172217</td>\n",
       "      <td>7908382889764677758;8277336076276184272</td>\n",
       "      <td>2072967855524022579;5131280576272319091;263639...</td>\n",
       "      <td>9057103201734987852</td>\n",
       "      <td>548352491538518780</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>4879721024980945592:2636395404473730413,719936...</td>\n",
       "      <td>4885989684392199728</td>\n",
       "      <td>15</td>\n",
       "      <td>0.985427</td>\n",
       "      <td>5012</td>\n",
       "      <td>0.974878</td>\n",
       "      <td>0.976863</td>\n",
       "      <td>0.969278</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>5698113918818659664</td>\n",
       "      <td>919980016657888153</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;124376...</td>\n",
       "      <td>7066302540842412840</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>5755694407684602296:9142827274221572643,914848...</td>\n",
       "      <td>6597981382309269962</td>\n",
       "      <td>21</td>\n",
       "      <td>0.997040</td>\n",
       "      <td>5018</td>\n",
       "      <td>0.979661</td>\n",
       "      <td>0.979589</td>\n",
       "      <td>0.975442</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>3398773118902833938</td>\n",
       "      <td>557883074900282934</td>\n",
       "      <td>7908382889764677758;8277336076276184272</td>\n",
       "      <td>2636395404473730413;6434796455031995313;643479...</td>\n",
       "      <td>7066302540842412840</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>7</td>\n",
       "      <td>12</td>\n",
       "      <td>14</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>8277336076276184272:2636395404473730413;176016...</td>\n",
       "      <td>6597981382309269962</td>\n",
       "      <td>21</td>\n",
       "      <td>0.997040</td>\n",
       "      <td>5018</td>\n",
       "      <td>0.979661</td>\n",
       "      <td>0.979589</td>\n",
       "      <td>0.975442</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>8127284011992533394</td>\n",
       "      <td>1093710751022752245</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;101341...</td>\n",
       "      <td>7066302540842412840</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>4012</td>\n",
       "      <td>509660095530134768:2636395404473730413;8277336...</td>\n",
       "      <td>6597981382309269962</td>\n",
       "      <td>21</td>\n",
       "      <td>0.997040</td>\n",
       "      <td>5018</td>\n",
       "      <td>0.979661</td>\n",
       "      <td>0.979589</td>\n",
       "      <td>0.975442</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>7059890618965359082</td>\n",
       "      <td>2540781378235236842</td>\n",
       "      <td>7908382889764677758;8277336076276184272</td>\n",
       "      <td>2072967855524022579;2636395404473730413;124376...</td>\n",
       "      <td>7066302540842412840</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>7</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>4001</td>\n",
       "      <td>8277336076276184272:2636395404473730413;176016...</td>\n",
       "      <td>6597981382309269962</td>\n",
       "      <td>21</td>\n",
       "      <td>0.997040</td>\n",
       "      <td>5018</td>\n",
       "      <td>0.979661</td>\n",
       "      <td>0.979589</td>\n",
       "      <td>0.975442</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_istrade = train[train.is_trade == 1]\n",
    "train_istrade.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_istrade' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-2-8b4f09ed4fa0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain_istrade\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem_id\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'train_istrade' is not defined"
     ]
    }
   ],
   "source": [
    "train_istrade.item_id.value_counts().hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instance_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_category_list</th>\n",
       "      <th>item_property_list</th>\n",
       "      <th>item_brand_id</th>\n",
       "      <th>item_city_id</th>\n",
       "      <th>item_price_level</th>\n",
       "      <th>item_sales_level</th>\n",
       "      <th>item_collected_level</th>\n",
       "      <th>item_pv_level</th>\n",
       "      <th>...</th>\n",
       "      <th>context_timestamp</th>\n",
       "      <th>context_page_id</th>\n",
       "      <th>predict_category_property</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>shop_review_num_level</th>\n",
       "      <th>shop_review_positive_rate</th>\n",
       "      <th>shop_star_level</th>\n",
       "      <th>shop_score_service</th>\n",
       "      <th>shop_score_delivery</th>\n",
       "      <th>shop_score_description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2475218615076601065</td>\n",
       "      <td>2275895163219263378</td>\n",
       "      <td>7908382889764677758;8277336076276184272</td>\n",
       "      <td>2636395404473730413;7515802706813700848;302719...</td>\n",
       "      <td>7838285046767229711</td>\n",
       "      <td>7534238860363577544</td>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>1537885700</td>\n",
       "      <td>4005</td>\n",
       "      <td>8277336076276184272:-1;5799347067982556520:-1;...</td>\n",
       "      <td>5275548996133547595</td>\n",
       "      <td>13</td>\n",
       "      <td>0.985244</td>\n",
       "      <td>5011</td>\n",
       "      <td>0.964023</td>\n",
       "      <td>0.966550</td>\n",
       "      <td>0.955378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>398316874173557226</td>\n",
       "      <td>7096238490711246967</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;734498...</td>\n",
       "      <td>8126195666233054089</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>1537862843</td>\n",
       "      <td>4001</td>\n",
       "      <td>5755694407684602296:216555019471416386,2636395...</td>\n",
       "      <td>329470548383752413</td>\n",
       "      <td>14</td>\n",
       "      <td>0.989068</td>\n",
       "      <td>5012</td>\n",
       "      <td>0.963794</td>\n",
       "      <td>0.964225</td>\n",
       "      <td>0.959134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6586402638209028583</td>\n",
       "      <td>7096238490711246967</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;734498...</td>\n",
       "      <td>8126195666233054089</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>1537858222</td>\n",
       "      <td>4011</td>\n",
       "      <td>5755694407684602296:2636395404473730413;871073...</td>\n",
       "      <td>329470548383752413</td>\n",
       "      <td>14</td>\n",
       "      <td>0.989068</td>\n",
       "      <td>5012</td>\n",
       "      <td>0.963794</td>\n",
       "      <td>0.964225</td>\n",
       "      <td>0.959134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1040996105851528465</td>\n",
       "      <td>7096238490711246967</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;734498...</td>\n",
       "      <td>8126195666233054089</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>1537860915</td>\n",
       "      <td>4001</td>\n",
       "      <td>8710739180200009128:-1;5755694407684602296:263...</td>\n",
       "      <td>329470548383752413</td>\n",
       "      <td>14</td>\n",
       "      <td>0.989068</td>\n",
       "      <td>5012</td>\n",
       "      <td>0.963794</td>\n",
       "      <td>0.964225</td>\n",
       "      <td>0.959134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6316278569655873454</td>\n",
       "      <td>7096238490711246967</td>\n",
       "      <td>7908382889764677758;5755694407684602296</td>\n",
       "      <td>5131280576272319091;2636395404473730413;734498...</td>\n",
       "      <td>8126195666233054089</td>\n",
       "      <td>3948283326616421003</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>1537859074</td>\n",
       "      <td>4001</td>\n",
       "      <td>5755694407684602296:5131280576272319091;871073...</td>\n",
       "      <td>329470548383752413</td>\n",
       "      <td>14</td>\n",
       "      <td>0.989068</td>\n",
       "      <td>5012</td>\n",
       "      <td>0.963794</td>\n",
       "      <td>0.964225</td>\n",
       "      <td>0.959134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "select_cols = ['item_price_level','item_sales_level','item_collected_level','item_pv_level','user_gender_id','user_age_level','user_star_level','context_page_id',\n",
    "              'shop_review_num_level','shop_review_positive_rate','shop_star_level','shop_score_service','shop_score_delivery','shop_score_description']\n",
    "\n",
    "select_train = train[select_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_price_level</th>\n",
       "      <th>item_sales_level</th>\n",
       "      <th>item_collected_level</th>\n",
       "      <th>item_pv_level</th>\n",
       "      <th>user_gender_id</th>\n",
       "      <th>user_age_level</th>\n",
       "      <th>user_star_level</th>\n",
       "      <th>context_page_id</th>\n",
       "      <th>shop_review_num_level</th>\n",
       "      <th>shop_review_positive_rate</th>\n",
       "      <th>shop_star_level</th>\n",
       "      <th>shop_score_service</th>\n",
       "      <th>shop_score_delivery</th>\n",
       "      <th>shop_score_description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>1003</td>\n",
       "      <td>3003</td>\n",
       "      <td>4006</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1002</td>\n",
       "      <td>3006</td>\n",
       "      <td>4001</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1003</td>\n",
       "      <td>3004</td>\n",
       "      <td>4001</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>1004</td>\n",
       "      <td>3006</td>\n",
       "      <td>4016</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1002</td>\n",
       "      <td>3001</td>\n",
       "      <td>4001</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "select_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from sklearn import linear_model\n",
    "#from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.svm import SVC\n",
    "X = train[select_cols]\n",
    "Y = train['is_trade']\n",
    "\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.4, random_state=0)\n",
    "# This is your model that will learn to predict\n",
    "#model = RandomForestRegressor()\n",
    "model = SVC()\n",
    "print(\"Training...\")\n",
    "# Your model is trained on the training_data\n",
    "model.fit(X_train, y_train)\n",
    "\n",
    "print(\"Predicting...\")\n",
    "# Your trained model is now used to make predictions on the numerai_tournament_data\n",
    "# The model returns two columns: [probability of 0, probability of 1]\n",
    "# We are just interested in the probability that the target is 1.\n",
    "y_prediction = model.predict_proba(X_test)\n",
    "test_pred = y_prediction[:, 1]\n",
    "print('los:', log_loss(y_test, test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
